{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read daily data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "burbio_daily_data = pd.read_csv(\"./data/burbio/burbio_daily_data.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "burbio_daily_data_with_dummies = pd.get_dummies(burbio_daily_data, columns=[\"gr 6-8\", \"gr 9-12\", \"k-5\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "features = [u'gr 6-8_H', u'gr 6-8_T', u'gr 6-8_V', u'gr 6-8_Vac', u'gr 9-12_H',\n",
    "       u'gr 9-12_T', u'gr 9-12_V', u'gr 9-12_Vac', u'k-5_H', u'k-5_T',\n",
    "       u'k-5_V', u'k-5_Vac']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "meta_columns = [\"dt\", \"fips\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute daily sums for percentage calculation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "fips_daily_sum = burbio_daily_data_with_dummies[meta_columns + features].groupby(meta_columns).sum().reset_index()\n",
    "fips_daily_count = burbio_daily_data_with_dummies[meta_columns + [\"gr 6-8_H\"]].groupby(meta_columns).count().reset_index()\n",
    "fips_daily_count.columns = [u'dt', u'fips', u'cnt']\n",
    "fips_daily_values = pd.merge(fips_daily_sum, fips_daily_count, on =meta_columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute normalized feature values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "for f in features:\n",
    "    fips_daily_values[f] = fips_daily_values[f]/fips_daily_values.cnt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "fips_daily_values[\"dt\"] = pd.to_datetime(fips_daily_values.dt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Group daily data to weekly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "weekly_data = fips_daily_values[meta_columns + features].groupby(\"fips\").resample('W-Wed', label=\"right\", closed=\"right\", on=\"dt\").mean().sort_values(by=\"dt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "weekly_data = weekly_data[features].reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "weekly_data.to_csv(\"./data/burbio/burbio_weekly_aggregated_data.csv\", header=True, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
